In [ ]:
import pandas as pd
import numpy as np
from pprint import pprint
import folium
from folium.plugins import MarkerCluster
import matplotlib.pyplot as plt
In [ ]:
df = pd.read_csv('mobility.csv')
pd.set_option('display.max_columns', None)
In [ ]:
df.head()
Out[ ]:
ID Name Mobility State Population Urban Black Seg_racial Seg_income Seg_poverty Seg_affluence Commute Income Gini Share01 Gini_99 Middle_class Local_tax_rate Local_gov_spending Progressivity EITC School_spending Student_teacher_ratio Test_scores HS_dropout Colleges Tuition Graduation Labor_force_participation Manufacturing Chinese_imports Teenage_labor Migration_in Migration_out Foreign_born Social_capital Religious Violent_crime Single_mothers Divorced Married Longitude Latitude
0 100 Johnson City 0.062199 TN 576081 1 0.021 0.090 0.035 0.030 0.038 0.325 31560 0.468 13.459 0.333 0.548 0.020 1886.0 0.0 0.0 5.185 NaN 2.728 -0.015 0.014 4817.0 -0.002 0.587 0.237 5.294 0.004 0.006 0.005 0.012 -0.298 0.514 0.001 0.190 0.110 0.601 -82.436386 36.470371
1 200 Morristown 0.053652 TN 227816 1 0.020 0.093 0.026 0.028 0.025 0.276 29959 0.435 10.631 0.328 0.538 0.023 2004.0 0.0 0.0 4.506 NaN -3.400 -0.024 0.009 4762.0 -0.101 0.625 0.238 3.030 0.005 0.016 0.014 0.023 -0.767 0.544 0.002 0.185 0.116 0.613 -83.407249 36.096539
2 301 Middlesborough 0.072635 TN 66708 0 0.015 0.064 0.024 0.015 0.026 0.359 22328 0.441 10.691 0.334 0.467 0.015 1190.0 0.0 0.0 5.614 15.1 -9.315 -0.005 0.045 11840.0 0.111 0.479 0.234 2.063 0.003 0.008 0.012 0.007 -1.270 0.668 0.001 0.211 0.113 0.590 -83.535332 36.551540
3 302 Knoxville 0.056281 TN 727600 1 0.056 0.210 0.092 0.084 0.102 0.269 35884 0.508 15.080 0.358 0.504 0.019 2357.0 0.0 0.0 4.900 NaN -6.032 -0.011 0.011 3480.0 -0.024 0.615 0.146 1.078 0.004 0.016 0.014 0.020 -0.222 0.602 0.001 0.206 0.114 0.575 -84.242790 35.952259
4 401 Winston-Salem 0.044801 NC 493180 1 0.174 0.262 0.072 0.061 0.081 0.292 38892 0.466 11.917 0.346 0.500 0.018 1891.0 1.0 0.0 5.463 15.4 -2.297 0.023 0.014 9715.0 0.052 0.656 0.215 1.016 0.004 0.022 0.019 0.053 -0.018 0.488 0.003 0.220 0.092 0.586 -80.505333 36.081276
In [ ]:
df.shape
Out[ ]:
(741, 43)
In [ ]:
df.columns
Out[ ]:
Index(['ID', 'Name', 'Mobility', 'State', 'Population', 'Urban', 'Black',
       'Seg_racial', 'Seg_income', 'Seg_poverty', 'Seg_affluence', 'Commute',
       'Income', 'Gini', 'Share01', 'Gini_99', 'Middle_class',
       'Local_tax_rate', 'Local_gov_spending', 'Progressivity', 'EITC',
       'School_spending', 'Student_teacher_ratio', 'Test_scores', 'HS_dropout',
       'Colleges', 'Tuition', 'Graduation', 'Labor_force_participation',
       'Manufacturing', 'Chinese_imports', 'Teenage_labor', 'Migration_in',
       'Migration_out', 'Foreign_born', 'Social_capital', 'Religious',
       'Violent_crime', 'Single_mothers', 'Divorced', 'Married', 'Longitude',
       'Latitude', 'count', 'racial_pop', 'poor_pop'],
      dtype='object')
In [ ]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 741 entries, 0 to 740
Data columns (total 43 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   ID                         741 non-null    int64  
 1   Name                       741 non-null    object 
 2   Mobility                   729 non-null    float64
 3   State                      741 non-null    object 
 4   Population                 741 non-null    int64  
 5   Urban                      741 non-null    int64  
 6   Black                      741 non-null    float64
 7   Seg_racial                 741 non-null    float64
 8   Seg_income                 741 non-null    float64
 9   Seg_poverty                741 non-null    float64
 10  Seg_affluence              741 non-null    float64
 11  Commute                    741 non-null    float64
 12  Income                     741 non-null    int64  
 13  Gini                       741 non-null    float64
 14  Share01                    709 non-null    float64
 15  Gini_99                    709 non-null    float64
 16  Middle_class               709 non-null    float64
 17  Local_tax_rate             740 non-null    float64
 18  Local_gov_spending         739 non-null    float64
 19  Progressivity              741 non-null    float64
 20  EITC                       741 non-null    float64
 21  School_spending            731 non-null    float64
 22  Student_teacher_ratio      711 non-null    float64
 23  Test_scores                705 non-null    float64
 24  HS_dropout                 593 non-null    float64
 25  Colleges                   584 non-null    float64
 26  Tuition                    580 non-null    float64
 27  Graduation                 581 non-null    float64
 28  Labor_force_participation  741 non-null    float64
 29  Manufacturing              741 non-null    float64
 30  Chinese_imports            722 non-null    float64
 31  Teenage_labor              709 non-null    float64
 32  Migration_in               724 non-null    float64
 33  Migration_out              724 non-null    float64
 34  Foreign_born               741 non-null    float64
 35  Social_capital             722 non-null    float64
 36  Religious                  741 non-null    float64
 37  Violent_crime              714 non-null    float64
 38  Single_mothers             741 non-null    float64
 39  Divorced                   741 non-null    float64
 40  Married                    741 non-null    float64
 41  Longitude                  741 non-null    float64
 42  Latitude                   741 non-null    float64
dtypes: float64(37), int64(4), object(2)
memory usage: 249.1+ KB
In [ ]:
df.nunique()
Out[ ]:
ID                           741
Name                         685
Mobility                     721
State                         51
Population                   739
Urban                          2
Black                        226
Seg_racial                   291
Seg_income                   125
Seg_poverty                  113
Seg_affluence                136
Commute                      404
Income                       719
Gini                         290
Share01                      690
Gini_99                      215
Middle_class                 283
Local_tax_rate                52
Local_gov_spending           625
Progressivity                 18
EITC                          15
School_spending              671
Student_teacher_ratio        120
Test_scores                  695
HS_dropout                   102
Colleges                      75
Tuition                      528
Graduation                   343
Labor_force_participation    231
Manufacturing                287
Chinese_imports              606
Teenage_labor                  7
Migration_in                  58
Migration_out                 45
Foreign_born                 143
Social_capital               658
Religious                    431
Violent_crime                  9
Single_mothers               211
Divorced                      91
Married                      192
Longitude                    741
Latitude                     741
dtype: int64
In [ ]:
df.isnull().sum()
Out[ ]:
ID                             0
Name                           0
Mobility                      12
State                          0
Population                     0
Urban                          0
Black                          0
Seg_racial                     0
Seg_income                     0
Seg_poverty                    0
Seg_affluence                  0
Commute                        0
Income                         0
Gini                           0
Share01                       32
Gini_99                       32
Middle_class                  32
Local_tax_rate                 1
Local_gov_spending             2
Progressivity                  0
EITC                           0
School_spending               10
Student_teacher_ratio         30
Test_scores                   36
HS_dropout                   148
Colleges                     157
Tuition                      161
Graduation                   160
Labor_force_participation      0
Manufacturing                  0
Chinese_imports               19
Teenage_labor                 32
Migration_in                  17
Migration_out                 17
Foreign_born                   0
Social_capital                19
Religious                      0
Violent_crime                 27
Single_mothers                 0
Divorced                       0
Married                        0
Longitude                      0
Latitude                       0
dtype: int64

Number of Entries Heatmap¶

In [ ]:
# Create HeatMap
from folium.plugins import HeatMap
property_map = folium.Map(
    location=[df['Latitude'].mean(), 
              df['Longitude'].mean()], 
    tiles='Stamen Terrain',
    attr='Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL',
    zoom_start=4,
    control_scale=True
)

df['count'] = 1
property_heatmap = HeatMap(
    data=df[['Latitude', 'Longitude', 'count']].groupby(['Latitude', 'Longitude']).sum().reset_index().values.tolist(), 
    name = 'Heatmap',
    radius = 10, 
    min_opacity = 0.1, 
    max_zoom=16,
    opacity = 10
).add_to(property_map)
         
folium.LayerControl().add_to(property_map)
property_map
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Population Heatmap¶

In [ ]:
# Create HeatMap
from folium.plugins import HeatMap
property_map = folium.Map(
    location=[df['Latitude'].mean(), 
              df['Longitude'].mean()], 
    tiles='Stamen Terrain',
    attr='Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL',
    zoom_start=4,
    control_scale=True
)

property_heatmap = HeatMap(
    data=df[['Latitude', 'Longitude', 'Population']].groupby(['Latitude', 'Longitude']).sum().reset_index().values.tolist(), 
    name = 'Heatmap',
    radius = 10, 
    min_opacity = 0.1, 
    max_zoom=16,
    opacity = 10
).add_to(property_map)
         
folium.LayerControl().add_to(property_map)
property_map
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
# Create Map: Basemap - OpenStreet Map
property_map = folium.Map(
    location=[df['Latitude'].mean(), 
              df['Longitude'].mean()], 
    zoom_start=4.5,
    control_scale=True
)

for i in range(len(df)):
    
    folium.CircleMarker(
        location = [df.Latitude.iloc[i], df.Longitude.iloc[i]],
        radius = 4,
        popup = df.Population.iloc[i],
        color = 'blue',
        opacity = 0.2
    ).add_to(property_map) 
    
property_map
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
# Create Map with clustering: Basemap - OpenStreet Map
property_map = folium.Map(
    location=[df['Latitude'].mean(), 
              df['Longitude'].mean()], 
    zoom_start=4,
    control_scale=True)

mc = MarkerCluster()

#creating a Marker for each point in dataframe. Each point will get a popup with their unique identifier (cid) value
for row in df.itertuples():
    mc.add_child(folium.Marker(location=[row.Latitude,row.Longitude], popup=row.Population))
property_map.add_child(mc)
property_map
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
df['racial_pop'] = df['Population']*df['Seg_racial']
df['poor_pop'] = df['Population']*df['Seg_poverty']
df
Out[ ]:
ID Name Mobility State Population Urban Black Seg_racial Seg_income Seg_poverty Seg_affluence Commute Income Gini Share01 Gini_99 Middle_class Local_tax_rate Local_gov_spending Progressivity EITC School_spending Student_teacher_ratio Test_scores HS_dropout Colleges Tuition Graduation Labor_force_participation Manufacturing Chinese_imports Teenage_labor Migration_in Migration_out Foreign_born Social_capital Religious Violent_crime Single_mothers Divorced Married Longitude Latitude count racial_pop poor_pop
0 100 Johnson City 0.062199 TN 576081 1 0.021 0.090 0.035 0.030 0.038 0.325 31560 0.468 13.459 0.333 0.548 0.020 1886.0 0.0 0.00 5.185 NaN 2.728 -0.015 0.014 4817.0 -0.002 0.587 0.237 5.294 0.004 0.006 0.005 0.012 -0.298 0.514 0.001 0.190 0.110 0.601 -82.436386 36.470371 1 51847.290 17282.430
1 200 Morristown 0.053652 TN 227816 1 0.020 0.093 0.026 0.028 0.025 0.276 29959 0.435 10.631 0.328 0.538 0.023 2004.0 0.0 0.00 4.506 NaN -3.400 -0.024 0.009 4762.0 -0.101 0.625 0.238 3.030 0.005 0.016 0.014 0.023 -0.767 0.544 0.002 0.185 0.116 0.613 -83.407249 36.096539 1 21186.888 6378.848
2 301 Middlesborough 0.072635 TN 66708 0 0.015 0.064 0.024 0.015 0.026 0.359 22328 0.441 10.691 0.334 0.467 0.015 1190.0 0.0 0.00 5.614 15.1 -9.315 -0.005 0.045 11840.0 0.111 0.479 0.234 2.063 0.003 0.008 0.012 0.007 -1.270 0.668 0.001 0.211 0.113 0.590 -83.535332 36.551540 1 4269.312 1000.620
3 302 Knoxville 0.056281 TN 727600 1 0.056 0.210 0.092 0.084 0.102 0.269 35884 0.508 15.080 0.358 0.504 0.019 2357.0 0.0 0.00 4.900 NaN -6.032 -0.011 0.011 3480.0 -0.024 0.615 0.146 1.078 0.004 0.016 0.014 0.020 -0.222 0.602 0.001 0.206 0.114 0.575 -84.242790 35.952259 1 152796.000 61118.400
4 401 Winston-Salem 0.044801 NC 493180 1 0.174 0.262 0.072 0.061 0.081 0.292 38892 0.466 11.917 0.346 0.500 0.018 1891.0 1.0 0.00 5.463 15.4 -2.297 0.023 0.014 9715.0 0.052 0.656 0.215 1.016 0.004 0.022 0.019 0.053 -0.018 0.488 0.003 0.220 0.092 0.586 -80.505333 36.081276 1 129213.160 30083.980
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
736 39205 John Day 0.115854 OR 7935 0 0.001 0.002 0.002 0.004 0.001 0.579 32736 0.311 4.400 0.267 0.584 0.013 3429.0 0.0 1.19 7.887 15.1 12.204 -0.039 NaN NaN NaN 0.615 0.099 0.083 0.005 0.003 0.015 0.015 0.208 0.331 0.000 0.195 0.108 0.628 -118.531197 44.594025 1 15.870 31.740
737 39301 Friday Harbor 0.101695 WA 14077 0 0.002 0.010 0.012 0.022 0.001 0.628 58628 0.796 60.527 0.190 0.560 0.018 2971.0 0.0 0.00 6.766 18.3 -3.884 NaN NaN NaN NaN 0.587 0.063 0.821 0.007 0.024 0.021 0.060 2.716 0.171 0.000 0.219 0.148 0.604 -123.052956 48.525379 1 140.770 309.694
738 39302 Bellingham 0.115575 WA 166814 1 0.006 0.057 0.046 0.051 0.042 0.418 35491 0.416 12.245 0.294 0.564 0.021 2615.0 0.0 0.00 5.912 21.1 5.120 NaN 0.024 2682.0 0.186 0.665 0.122 1.324 0.005 0.034 0.028 0.098 0.063 0.294 0.001 0.195 0.099 0.538 -121.263443 48.831154 1 9508.398 8507.514
739 39303 Port Angeles 0.085840 WA 90478 0 0.007 0.122 0.025 0.028 0.024 0.486 39625 0.414 12.848 0.285 0.575 0.015 3201.0 0.0 0.00 6.609 19.5 3.012 NaN 0.011 1641.0 -0.183 0.522 0.077 0.165 0.004 0.034 0.021 0.043 0.476 0.260 0.001 0.235 0.124 0.598 -123.544647 47.912067 1 11038.316 2533.384
740 39400 Seattle 0.108551 WA 3775744 1 0.043 0.107 0.083 0.072 0.090 0.240 44591 0.424 18.700 0.237 0.514 0.021 3024.0 0.0 0.00 6.604 21.0 -0.108 NaN 0.010 5596.0 0.038 0.682 0.130 0.677 0.004 0.023 0.022 0.113 -0.125 0.323 0.002 0.195 0.116 0.544 -122.012230 47.644394 1 404004.608 271853.568

741 rows × 46 columns

In [ ]:
df_groupby_racial_perc = df.groupby(['State']).mean('racial_pop')
df_groupby_racial_perc['Perc_Racial'] = round(((df_groupby_racial_perc['racial_pop']*100)/df_groupby_racial_perc['Population']), 2)

df_groupby_poor_perc = df.groupby(['State']).mean('poor_pop')
df_groupby_poor_perc['Perc_Poor'] = round(((df_groupby_poor_perc['poor_pop']*100)/df_groupby_poor_perc['Population']), 2)
In [ ]:
plt.figure(figsize=(10,8))
df_groupby_poor_perc['Perc_Poor'].plot(kind = 'bar')
plt.xlabel("States")
plt.ylabel("Percentage of Poor People")
plt.show()
In [ ]:
# Create Map with clustering: Basemap - OpenStreet Map
property_map = folium.Map(
    location=[df['Latitude'].mean(), 
              df['Longitude'].mean()], 
    zoom_start=4,
    control_scale=True)

mc = MarkerCluster()

#creating a Marker for each point in dataframe. Each point will get a popup with their unique identifier (cid) value
for row in df.itertuples():
    mc.add_child(folium.Marker(location=[row.Latitude,row.Longitude], popup=row.racial_pop))
property_map.add_child(mc)
property_map
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
plt.figure(figsize=(10,8))
df_groupby_racial_perc['Perc_Racial'].plot(kind = 'bar')
plt.xlabel("States")
plt.ylabel("Percentage of Racist People")
plt.show()
In [ ]:
# Create Map with clustering: Basemap - OpenStreet Map
property_map = folium.Map(
    location=[df['Latitude'].mean(), 
              df['Longitude'].mean()], 
    zoom_start=4,
    control_scale=True)

mc = MarkerCluster()

#creating a Marker for each point in dataframe. Each point will get a popup with their unique identifier (cid) value
for row in df.itertuples():
    mc.add_child(folium.Marker(location=[row.Latitude,row.Longitude], popup=row.poor_pop))
property_map.add_child(mc)
property_map
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
df_income_gt_mean = df[df['Income']>df['Income'].mean()]
df_income_lt_mean = df[df['Income']<df['Income'].mean()]
In [ ]:
plt.figure(figsize=(6,6))
df_income_lt_mean['Urban'].value_counts().plot(kind = 'pie', autopct='%1.2f%%')
plt.legend(loc=1)
plt.title("Distribution of Income")
plt.show()
In [ ]:
plt.figure(figsize=(6,6))
df_income_gt_mean['Urban'].value_counts().plot(kind = 'pie', autopct='%1.2f%%')
plt.legend(loc=1)
plt.title("Distribution of Income")
plt.show()
In [ ]:
# Create Map with clustering: Basemap - OpenStreet Map
property_map = folium.Map(
    location=[df['Latitude'].mean(), 
              df['Longitude'].mean()], 
    zoom_start=4,
    control_scale=True)

mc = MarkerCluster()

#creating a Marker for each point in dataframe. Each point will get a popup with their unique identifier (cid) value
for row in df.itertuples():
    mc.add_child(folium.Marker(location=[row.Latitude,row.Longitude], popup=row.racial_pop))
property_map.add_child(mc)
property_map
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
df_groupby_state = df.groupby('State').mean('Income')

# Create Map: Basemap - OpenStreet Map
property_map = folium.Map(
    location=[df['Latitude'].mean(), 
              df['Longitude'].mean()], 
    zoom_start=4,
    control_scale=True
)

for i in range(len(df_groupby_state)):
    
    folium.CircleMarker(
        location = [df_groupby_state.Latitude.iloc[i], df_groupby_state.Longitude.iloc[i]],
        radius = 3,
        popup = df_groupby_state.Income.iloc[i],
        color = 'blue',
        opacity = 0.2
    ).add_to(property_map) 
    
property_map
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
df_urban = df[df['Urban']==1]
df_rural = df[df['Urban']==0]

df_rural_groupby_state = df_rural.groupby(['State']).mean('Black')
df_urban_groupby_state = df_urban.groupby(['State']).mean('Black')
In [ ]:
# Create HeatMap
from folium.plugins import HeatMap
property_map = folium.Map(
    location=[df_rural_groupby_state['Latitude'].mean(), 
              df_rural_groupby_state['Longitude'].mean()], 
    tiles='Stamen Terrain',
    attr='Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL',
    zoom_start=4,
    control_scale=True
)

property_heatmap = HeatMap(
    data=df_rural_groupby_state[['Latitude', 'Longitude', 'Black']].groupby(['Latitude', 'Longitude']).sum().reset_index().values.tolist(), 
    name = 'Heatmap',
    radius = 10, 
    min_opacity = 0.1, 
    max_zoom=16,
    opacity = 20
).add_to(property_map)
         
folium.LayerControl().add_to(property_map)
property_map
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
# Create HeatMap
from folium.plugins import HeatMap
property_map = folium.Map(
    location=[df_urban_groupby_state['Latitude'].mean(), 
              df_urban_groupby_state['Longitude'].mean()], 
    tiles='Stamen Terrain',
    attr='Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL',
    zoom_start=4,
    control_scale=True
)

property_heatmap = HeatMap(
    data=df_urban_groupby_state[['Latitude', 'Longitude', 'Black']].groupby(['Latitude', 'Longitude']).sum().reset_index().values.tolist(), 
    name = 'Heatmap',
    radius = 10, 
    min_opacity = 0.1, 
    max_zoom=16,
    opacity = 20
).add_to(property_map)
         
folium.LayerControl().add_to(property_map)
property_map
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
import matplotlib.pyplot as plt
import seaborn as sns
df_wo_category = df[['Seg_income', 'Seg_poverty', 'Black', 'Seg_racial', 'Seg_affluence', 'Commute', 'Income', 'Gini', 'Share01', 'Gini_99',
                     'Middle_class', 'Local_tax_rate', 'Local_gov_spending', 'Progressivity', 'School_spending', 'Labor_force_participation',
                     'Violent_crime', 'Religious', 'Social_capital', 'Teenage_labor']] 
sns.pairplot(df_wo_category)
plt.savefig('correlation_plot.png')
plt.show()